#load raw data KEGG annotations: configs based gene abundances
kegg_abund_raw<- read.csv("kegg_abund_raw_t.csv")
Sample_id<- read.csv("Sample_ID.csv")
env_data<- read.csv("env_data.csv")
# load prodigal output of total protein coding genes to normalise data
prodigal_output_kegg_abund<- read.csv("prodigal_output_kegg.csv")
kegg_abund<- kegg_abund_raw %>%
left_join(x=.,y=prodigal_output_kegg_abund, by=c("Sample"))
kegg_abund_gather<- kegg_abund %>%
gather(key = "KO", value = "reads",
K00003:K24042 )
kegg_abund_gather$reads<- as.numeric(kegg_abund_gather$reads)
#normalise: gene abundances as reads divided by total per sample prodigal output multiplied by mean all sample prodigal output
kegg_abund_gather<- kegg_abund_gather %>%
mutate(reads_norm = (reads/prodigal_output)*57474057.89)
kegg_abund_gather<- kegg_abund_gather %>%
left_join(x=.,y=Sample_id, by=c("Sample"))
# pivot data by site (for faster execution)
kegg_abund_mig<- kegg_abund_gather %>%
filter(Site == "Migneint") %>%
select(Sample,KO,reads_norm) %>%
pivot_wider(names_from = KO, values_from = reads_norm)
kegg_abund_moor<- kegg_abund_gather %>%
filter(Site == "Moors_House") %>%
select(Sample,KO,reads_norm) %>%
pivot_wider(names_from = KO, values_from = reads_norm)
kegg_abund_cro<- kegg_abund_gather %>%
filter(Site == "Crocach") %>%
select(Sample,KO,reads_norm) %>%
pivot_wider(names_from = KO, values_from = reads_norm)
kegg_abund_bal<- kegg_abund_gather %>%
filter(Site == "Balmoral") %>%
select(Sample,KO,reads_norm) %>%
pivot_wider(names_from = KO, values_from = reads_norm)
kegg_abund_bow<- kegg_abund_gather %>%
filter(Site == "Bowness") %>%
select(Sample,KO,reads_norm) %>%
pivot_wider(names_from = KO, values_from = reads_norm)
kegg_abund_lang<- kegg_abund_gather %>%
filter(Site == "Langwell" ) %>%
select(Sample,KO,reads_norm) %>%
pivot_wider(names_from = KO, values_from = reads_norm)
kegg_abund_stean<- kegg_abund_gather %>%
filter(Site == "Stean") %>%
select(Sample,KO,reads_norm) %>%
pivot_wider(names_from = KO, values_from = reads_norm)
# dataframe to select genes of interest
kegg_abund <- rbind(kegg_abund_bal, kegg_abund_bow,kegg_abund_cro,kegg_abund_mig,kegg_abund_lang, kegg_abund_moor, kegg_abund_stean)
kegg_abund <- kegg_abund %>%
left_join(x=.,y=Sample_id, by=c("Sample"))
#load raw data CAZy annotations: configs based gene abundances
cazy_abund_raw<- read.csv("cazy_abund_raw.csv")
rownames(cazy_abund_raw) <- cazy_abund_raw[,1]
cazy_abund_raw<- cazy_abund_raw %>%
select(-c("Sample"))
cazy_abund_raw_t<- t(cazy_abund_raw)
cazy_abund <- cbind(Sample = rownames(cazy_abund_raw_t), cazy_abund_raw_t)
cazy_abund<- as.data.frame(cazy_abund, stringsAsFactors=FALSE)
prodigal_output_caz_abund<- read.csv("prodigal_output_caz_abund.csv")
cazy_abund<- cazy_abund %>%
left_join(x=.,y=prodigal_output_caz_abund, by=c("Sample"))
cazy_abund_gather<- cazy_abund %>%
gather(key = "cazyme", value = "reads",
AA1_1:SLH )
cazy_abund_gather$reads<- as.numeric(cazy_abund_gather$reads)
cazy_abund_gather<- cazy_abund_gather %>%
mutate(reads_norm = (reads/prodigal_output)*57474057.89)
cazy_abund_gather<- cazy_abund_gather %>%
left_join(x=.,y=Sample_id, by=c("Sample"))
### CAZy genes by substrate classes
cazy_substrate<- read.csv("cazyme_by_substrate.csv")
# create dataframe in same format
cazy_abund_substrate<- cazy_abund_gather %>%
left_join(x=., y=cazy_substrate, by=("cazyme"))
cazy_substrate_sum<- cazy_abund_substrate %>%
dplyr::group_by(Sample, Treatment, Site,Substrate) %>%
dplyr::summarise(reads_sum= sum(reads_norm, na.rm=TRUE))
## `summarise()` has grouped output by 'Sample', 'Treatment', 'Site'. You can override using the `.groups`
## argument.
cazy_substrate_sum<- cazy_substrate_sum %>%
filter(Sample %in% c( "MGr3I" , "MGr3H", "MGr3G" , "MGr2F" , "MGr2E" , "MGr2D" , "MGr1C" , "MGr1B" ,
"MGr1A" , "CRr3I" , "CRr3H" , "CRr3G" , "CRr2F" , "CRr2E" , "CRr2D" , "CRr1C" ,
"CRr1B" , "CRr1A" , "MHr3I" , "MHr3H" , "MHr3G" , "MHr2F" , "MHr2E" , "MHr2D",
"MHr1C" , "MHr1B" , "MHr1A", "BOr3I" , "BOr3H" , "BOr3G" , "BOr2F" , "BOr2E" ,
"BOr2D" , "BOr1C" , "BOr1B" , "BOr1A", "BAr3I" , "BAr3H" , "BAr3G" , "BAr2F",
"BAr2E" , "BAr2D" , "BAr1C" , "BAr1B" , "BAr1A" , "SEr3I" , "SEr3H" , "SEr3G",
"SEr2F" , "SEr2E" , "SEr2D" , "LASCr2F", "LASCr2E",
"LASCr2D", "LABRr1C" ,"LABRr1B" ,"LABRr1A" ,"LASAr3I" ,"LASAr3G","LASAr3H" ))
## Figure 4a ##
# Fermentation
kegg_abund_ferm<- kegg_abund %>%
select(
"K00163", "K00627", "K00169" , "K03737", "K00174", "K00656", "K01568" , "K14028", "K00114",
"K00163" , "K00627" , "K00169" , "K03737" , "K00174" , "K00656", "K13788" , "K01512" , "K00925",
"K00101", "K00016", "K00102" , "K03777",
"K01595", "K00024", "K01676", "K00239", "K00244", "K18209" ,"K01899" , "K01902", "K01847", "K05606" ,"K11264", "K03416",Sample
)
####
# convert to as.numeric
kegg_abund_ferm<- kegg_abund_ferm %>%
mutate(across(1:28, as.numeric))
# sum
kegg_abund_ferm_sum<- kegg_abund_ferm %>%
left_join(x=.,y=Sample_id, by=c("Sample")) %>%
mutate(ferm_sum = K00163+ K00627+ K00169 +K03737+ K00174+ K00656+ K01568 + K14028+ K00114+
K00163 + K00627 + K00169 + K03737 + K00174 + K00656+ K13788 + K01512 + K00925+
K00101+ K00016+ K00102 + K03777+
K01595+ K00024+ K01676+ K00239+ K00244+ K18209 +K01899 + K01902+ K01847+ K05606 +K11264+ K03416 ) %>%
mutate(ethanol_ferm = K00163+ K00627+ K00169 +K03737+ K00174+ K00656+ K01568 + K14028+ K00114) %>%
mutate(acetato = K00163 + K00627 + K00169 + K03737 + K00174 + K00656+ K13788 + K01512 + K00925) %>%
mutate(lactate = K00101+ K00016+ K00102 + K03777) %>%
mutate(prop_ferm = K01595+ K00024+ K01676+ K00239+ K00244+ K18209 +K01899 + K01902+ K01847+ K05606 +K11264+ K03416)
kegg_abund_ferm_sum<- kegg_abund_ferm_sum %>%
filter(Sample %in% c( "MGr3I" , "MGr3H", "MGr3G" , "MGr2F" , "MGr2E" , "MGr2D" , "MGr1C" , "MGr1B" ,
"MGr1A" , "CRr3I" , "CRr3H" , "CRr3G" , "CRr2F" , "CRr2E" , "CRr2D" , "CRr1C" ,
"CRr1B" , "CRr1A" , "MHr3I" , "MHr3H" , "MHr3G" , "MHr2F" , "MHr2E" , "MHr2D",
"MHr1C" , "MHr1B" , "MHr1A", "BOr3I" , "BOr3H" , "BOr3G" , "BOr2F" , "BOr2E" ,
"BOr2D" , "BOr1C" , "BOr1B" , "BOr1A", "BAr3I" , "BAr3H" , "BAr3G" , "BAr2F",
"BAr2E" , "BAr2D" , "BAr1C" , "BAr1B" , "BAr1A" , "SEr3I" , "SEr3H" , "SEr3G",
"SEr2F" , "SEr2E" , "SEr2D" , "LASCr2F", "LASCr2E",
"LASCr2D", "LABRr1C" ,"LABRr1B" ,"LABRr1A" ,"LASAr3I" ,"LASAr3G", "LASAr3H" ))
Figure_4a <- ggplot(kegg_abund_ferm_sum, aes(x=Treatment, y=ferm_sum))+
geom_boxplot(lwd=1,outlier.shape= NA)+
geom_jitter(aes(y=ferm_sum, x=Treatment, color=Site), alpha=0.7, width = 0.3,size=3)+
ylab(expression(atop("Fermentation", paste("(normalised reads ×1000)"))))+ ### [reported] = subscript
xlab(" ")+
theme_light(base_size = 14)+
theme(legend.position = "None")+
scale_colour_manual(values=c( "#e41a1c",
"#377eb8",
"#4daf4a",
"#984ea3",
"#ff7f00",
"#ffff33",
"#a65628"),
labels = c("Balmoral" = "Balmoral",
"Bowness" = "Bowness",
"Crocach" = "Crocach",
"Langwell" = "Langwell",
"Migneint" = "Migneint",
"Moors_House" = "Moor House",
"Stean" = "Stean" )) +
scale_x_discrete(limits=c("DAM", "REST","NAT"),labels=c("DAM" = "Degraded", "NAT" = "Natural",
"REST" = "Restored"))+
scale_y_continuous(labels = function(x) x / 1000 , limits = c(44000, 71000)) +
theme(legend.text=element_text(size=16))+
theme(legend.position = "None")+
theme(axis.text=element_text(size=13,colour="black"))
Figure_4a
### Save the plot
ggsave("Figure_4a.png", dpi=1000, width = 4, height = 4, units = "in")
ferm_sum_lm <- lm(ferm_sum ~ Treatment, data = kegg_abund_ferm_sum)
# Perform pairwise comparisons for the Treatment variable
emmeans_results <- emmeans(ferm_sum_lm, ~ Treatment)
# Perform pairwise comparisons for DAM vs. REST, REST vs. NAT, DAM vs. NAT
pairwise_comparisons <- pairs(emmeans_results, adjust = "tukey")
# Print the pairwise comparisons
summary(pairwise_comparisons)
## contrast estimate SE df t.ratio p.value
## DAM - NAT -2377 1990 57 -1.195 0.4610
## DAM - REST -1540 1910 57 -0.806 0.7010
## NAT - REST 837 1990 57 0.421 0.9071
##
## P value adjustment: tukey method for comparing a family of 3 estimates
ferm_sum_lm_interaction <- lm(ferm_sum ~ Treatment * Site, data = kegg_abund_ferm_sum)
anova(ferm_sum_lm_interaction)
## Analysis of Variance Table
##
## Response: ferm_sum
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 57416417 28708208 2.7933 0.07319 .
## Site 6 952065701 158677617 15.4393 4.547e-09 ***
## Treatment:Site 11 821657669 74696152 7.2679 1.261e-06 ***
## Residuals 40 411101217 10277530
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
merged_ferm <- merge(kegg_abund_ferm_sum, env_data, by = "Sample")
site_order <- c("Balmoral","Bowness","Stean","Moors_House","Langwell","Crocach","Migneint")
merged_ferm$Site.x <- factor(merged_ferm$Site.x, levels = site_order)
site_labels <- c(
"Balmoral" = "Balmoral",
"Bowness" = "Bowness",
"Crocach" = "Crocach",
"Langwell" = "Langwell",
"Migneint" = "Migneint",
"Moors_House" = "Moor House", # <- rename here
"Stean" = "Stean"
)
#correaltion
# Select relevant columns (numeric factors)
cor_data <- merged_ferm %>%
select(ferm_sum, moisture, ph, sphag, Dim.1, oxic, CN, eco_index)
# Compute Pearson correlation matrix
cor_matrix <- cor(cor_data, use = "complete.obs", method = "pearson")
# Print correlation matrix
print(cor_matrix)
## ferm_sum moisture ph sphag Dim.1 oxic CN eco_index
## ferm_sum 1.0000000 0.50089483 0.37926768 0.3533784 -0.08862410 -0.53419118 -0.23203181 0.6032441
## moisture 0.5008948 1.00000000 0.43568799 0.4366189 -0.35949111 -0.25681759 0.06967877 0.6790104
## ph 0.3792677 0.43568799 1.00000000 0.2438908 -0.33906204 -0.30907168 -0.04545533 0.6320731
## sphag 0.3533784 0.43661892 0.24389085 1.0000000 -0.13416082 -0.49279052 -0.20426364 0.6980154
## Dim.1 -0.0886241 -0.35949111 -0.33906204 -0.1341608 1.00000000 -0.01288157 -0.21379767 -0.4133396
## oxic -0.5341912 -0.25681759 -0.30907168 -0.4927905 -0.01288157 1.00000000 0.39330447 -0.7262251
## CN -0.2320318 0.06967877 -0.04545533 -0.2042636 -0.21379767 0.39330447 1.00000000 -0.4059578
## eco_index 0.6032441 0.67901038 0.63207308 0.6980154 -0.41333962 -0.72622506 -0.40595784 1.0000000
ferm_sum_env <- lm(ferm_sum ~ moisture+ ph+ sphag+ Dim.1+ oxic+ CN, data = merged_ferm)
summary(ferm_sum_env)
##
## Call:
## lm(formula = ferm_sum ~ moisture + ph + sphag + Dim.1 + oxic +
## CN, data = merged_ferm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11802.5 -3068.3 -262.3 2717.1 10279.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11018.37 25064.31 -0.440 0.66224
## moisture 766.01 256.08 2.991 0.00441 **
## ph 4189.26 4680.14 0.895 0.37529
## sphag -24.29 64.40 -0.377 0.70775
## Dim.1 69.69 137.49 0.507 0.61462
## oxic -1279.71 464.55 -2.755 0.00833 **
## CN -107.82 127.63 -0.845 0.40250
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5018 on 47 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.4492, Adjusted R-squared: 0.3788
## F-statistic: 6.387 on 6 and 47 DF, p-value: 5.715e-05
ferm_sum_env <- lm(ferm_sum ~ moisture+ oxic, data = merged_ferm)
summary(ferm_sum_env)
##
## Call:
## lm(formula = ferm_sum ~ moisture + oxic, data = merged_ferm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12853.2 -3219.8 56.5 2955.0 11335.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 196.7 18630.5 0.011 0.991614
## moisture 772.9 202.8 3.812 0.000341 ***
## oxic -1138.4 346.5 -3.285 0.001746 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5043 on 57 degrees of freedom
## Multiple R-squared: 0.3534, Adjusted R-squared: 0.3307
## F-statistic: 15.58 on 2 and 57 DF, p-value: 4.009e-06
ferm_sum_eco <- lm(ferm_sum ~ eco_index , data = merged_ferm)
summary(ferm_sum_eco)
##
## Call:
## lm(formula = ferm_sum ~ eco_index, data = merged_ferm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13315.7 -2855.9 110.9 2559.6 12587.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 59313.1 692.6 85.634 < 2e-16 ***
## eco_index 5555.5 1245.3 4.461 3.8e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5365 on 58 degrees of freedom
## Multiple R-squared: 0.2555, Adjusted R-squared: 0.2426
## F-statistic: 19.9 on 1 and 58 DF, p-value: 3.804e-05
Figure_4f <- ggplot(merged_ferm, aes(x = eco_index, y = ferm_sum)) +
geom_point(color = "black",size = 4, alpha = 0.7,aes(shape=as.character(Treatment.x),
fill=as.character(Site.x))) +
geom_smooth(method = "lm", color = "black", se = TRUE, linetype = "dashed", size = 0.7, alpha = 0.3) +
scale_fill_manual(values=c( "#e41a1c",
"#377eb8",
"#4daf4a",
"#984ea3",
"#ff7f00",
"#ffff33",
"#a65628"),
labels = c("Balmoral" = "Balmoral",
"Bowness" = "Bowness",
"Crocach" = "Crocach",
"Langwell" = "Langwell",
"Migneint" = "Migneint",
"Moors_House" = "Moor House",
"Stean" = "Stean" )) +
scale_shape_manual(name="Treatment", values=c(21, 22, 23), limits=c("NAT", "REST", "DAM"),
labels=c("DAM" = "Degraded", "NAT" = "Natural", "REST" = "Restored"))+
ylab(expression(atop("Fermentation", paste("(normalised reads ×1000)"))))+
scale_y_continuous(labels = function(x) x / 1000) +
guides(
fill = guide_legend(order = 1, title = "Site", override.aes = list(shape = 21, color = "black")),
shape = guide_legend(order = 2, title = "Treatment") ) +
labs(x = "Ecosystem health index") +
theme_light(base_size = 14)+
theme(axis.text = element_text(size = 13, colour = "black"))
Figure_4f
## `geom_smooth()` using formula = 'y ~ x'
### Save the plot
ggsave("Figure_4f.png", dpi=1000, width = 6, height = 4.5, units = "in")
## `geom_smooth()` using formula = 'y ~ x'
merged_ferm <- merged_ferm %>% filter(Site.x != "Stean")
ggplot(merged_ferm, aes(x = eco_index, y = ferm_sum)) +
geom_point(color = "black", size = 4, alpha = 0.7,
aes(shape = as.character(Treatment.x),
fill = as.character(Site.x))) +
geom_smooth(method = "lm", color = "black", se = TRUE,
linetype = "dashed", size = 0.7, alpha = 0.3) +
scale_fill_manual(values = c(
"#e41a1c", "#377eb8", "#4daf4a",
"#984ea3", "#ff7f00", "#ffff33", "#a65628")) +
scale_shape_manual(name = "Treatment", values = c(21, 22, 23),
limits = c("NAT", "REST", "DAM"),
labels = c("DAM" = "Degraded", "NAT" = "Natural", "REST" = "Restored")) +
labs(y = "Fermentation") +
scale_y_continuous(labels = function(x) x / 1000) +
labs(x = "Ecosystem health index") +
theme_light(base_size = 14) +
theme(
axis.text = element_text(size = 13, colour = "black"),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none"
) +
facet_grid(. ~ Site.x, scales = "free", labeller = as_labeller(site_labels))
## `geom_smooth()` using formula = 'y ~ x'
### Save the plot
ggsave("Figure_S9a.png", dpi=1000, width = 8, height = 3, units = "in")
## `geom_smooth()` using formula = 'y ~ x'
kegg_abund_ferm_sum <- merge(kegg_abund_ferm_sum,env_data,by="Sample")
lm_eco <- lm(ferm_sum ~ eco_index, data = kegg_abund_ferm_sum)
eco_summary <- summary(lm_eco)
# Extract model fit stats
r_squared <- round(eco_summary$r.squared, 3)
adj_r_squared <- round(eco_summary$adj.r.squared, 3)
f_stat <- eco_summary$fstatistic
model_p <- round(pf(f_stat[1], f_stat[2], f_stat[3], lower.tail = FALSE), 4)
# Coefficients table
eco_df <- as.data.frame(eco_summary$coefficients)
eco_df$Term <- rownames(eco_df)
# Add model stats as additional rows
stats_df <- data.frame(
Estimate = NA,
`Std. Error` = NA,
`t value` = NA,
`Pr(>|t|)` = NA,
Term = c(
paste0("R² = ", r_squared),
paste0("Adjusted R² = ", adj_r_squared),
paste0("Model p-value = ", model_p)
)
)
# Combine the coefficient table and model statistics
names(stats_df) <- names(eco_df)
eco_results <- rbind(eco_df, stats_df)
# Coefficients table
eco_df <- as.data.frame(eco_summary$coefficients)
eco_df$Term <- rownames(eco_df)
rownames(eco_df) <- NULL
# Reorder columns
eco_df <- eco_df[, c("Estimate", "Std. Error", "t value", "Pr(>|t|)", "Term")]
# Model stats as rows (ensure same column names and order)
stats_df <- data.frame(
Estimate = NA,
`Std. Error` = NA,
`t value` = NA,
`Pr(>|t|)` = NA,
Term = c(
paste0("R² = ", r_squared),
paste0("Adjusted R² = ", adj_r_squared),
paste0("Model p-value = ", model_p)
),
check.names = FALSE
)
# Bind rows
eco_results <- rbind(eco_df, stats_df)
# Write to CSV
write.csv(eco_results, file = "eco_index_model_results.csv", row.names = FALSE)
# Write to CSV
write.csv(eco_results, file = "fer_eco_index_results.csv", row.names = FALSE)
# Stats
model <- lm(log(Growth) ~ ferm_sum, data = merged_ferm)
summary(model)
##
## Call:
## lm(formula = log(Growth) ~ ferm_sum, data = merged_ferm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.57879 -0.78089 0.03437 0.79225 2.30830
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.456e+00 1.391e+00 3.923 0.000258 ***
## ferm_sum -9.263e-05 2.341e-05 -3.956 0.000232 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.085 on 52 degrees of freedom
## Multiple R-squared: 0.2314, Adjusted R-squared: 0.2166
## F-statistic: 15.65 on 1 and 52 DF, p-value: 0.0002318
#Plot ferm vs growth
ggplot(merged_ferm, aes(y = Growth, x = ferm_sum)) +
geom_point(color = "black",size = 4, alpha = 0.7,aes(shape=as.character(Treatment.x),
fill=as.character(Site.x))) +
geom_smooth(method = "lm", color = "black", se = TRUE, linetype = "dashed", size = 0.7, alpha = 0.3) +
scale_fill_manual(values=c( "#e41a1c",
"#377eb8",
"#4daf4a",
"#984ea3",
"#ff7f00",
"#ffff33",
"#a65628"),
labels = c("Balmoral" = "Balmoral",
"Bowness" = "Bowness",
"Crocach" = "Crocach",
"Langwell" = "Langwell",
"Migneint" = "Migneint",
"Moors_House" = "Moor House",
"Stean" = "Stean" )) +
scale_shape_manual(name="Treatment", values=c(21, 22, 23), limits=c("NAT", "REST", "DAM"),
labels=c("DAM" = "Degraded", "NAT" = "Natural", "REST" = "Restored"))+
xlab(expression(atop("Fermentation", paste("(normalised reads ×1000)"))))+
ylab(expression("Microbial growth rate (ng g"^{-1}*"h"^{-1}*")"))+
scale_x_continuous(labels = function(x) x / 1000) +
scale_y_continuous(trans='log2',labels = label_number(accuracy = 0.1), limits = c(0.05, 11))+
theme(
axis.text = element_text(size = 13, colour = "black"),
legend.position = "None"
) +
theme_light(base_size = 14) +
guides(
fill = guide_legend(order = 1, title = "Site", override.aes = list(shape = 21, color = "black")),
shape = guide_legend(order = 2, title = "Treatment") )
## `geom_smooth()` using formula = 'y ~ x'
### Save the plot
ggsave("Figure_S10c.png", dpi=1000, width = 7, height = 4.5, units = "in")
## `geom_smooth()` using formula = 'y ~ x'
cazy_substrate_Lignin <- cazy_substrate_sum %>%
filter(Substrate == "Lignin")
Figure_5a <- ggplot(cazy_substrate_Lignin, aes(x=Treatment, y=reads_sum))+
geom_boxplot(lwd=1,outlier.shape= NA)+
geom_jitter(aes(y=reads_sum, x=Treatment, color=Site), alpha=0.7, width = 0.3,size=3)+
ylab(expression(atop("Lignin", paste("(normalised reads ×1000)"))))+
xlab(" ")+
theme_light(base_size = 14)+
theme(legend.position = "None")+
scale_colour_manual(values=c( "#e41a1c",
"#377eb8",
"#4daf4a",
"#984ea3",
"#ff7f00",
"#ffff33",
"#a65628"),
labels = c("Balmoral" = "Balmoral",
"Bowness" = "Bowness",
"Crocach" = "Crocach",
"Langwell" = "Langwell",
"Migneint" = "Migneint",
"Moors_House" = "Moor House",
"Stean" = "Stean" )) +
scale_x_discrete(limits=c("DAM", "REST","NAT"),labels=c("DAM" = "Degraded", "NAT" = "Natural",
"REST" = "Restored"))+
scale_y_continuous(labels = function(x) x / 1000 , limits = c(10500, 17500)) +
theme(legend.text=element_text(size=16))+
theme(legend.position = "None")+
theme(axis.text=element_text(size=13,colour="black"))
Figure_5a
### Save the plot
ggsave("Figure_5a.png", dpi=1000, width = 4, height = 4, units = "in")
Lignin_lm <- lm(reads_sum ~ Treatment, data = cazy_substrate_Lignin)
# Perform pairwise comparisons for the Treatment variable
emmeans_results <- emmeans(Lignin_lm, ~ Treatment)
# Perform pairwise comparisons for DAM vs. REST, REST vs. NAT, DAM vs. NAT
pairwise_comparisons <- pairs(emmeans_results, adjust = "tukey")
# Print the pairwise comparisons
summary(pairwise_comparisons)
## contrast estimate SE df t.ratio p.value
## DAM - NAT 380.7 510 57 0.746 0.7373
## DAM - REST 447.2 490 57 0.912 0.6350
## NAT - REST 66.5 510 57 0.130 0.9907
##
## P value adjustment: tukey method for comparing a family of 3 estimates
Lignin_lm_interaction <- lm(reads_sum ~ Treatment * Site, data = cazy_substrate_Lignin)
anova(Lignin_lm_interaction)
## Analysis of Variance Table
##
## Response: reads_sum
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 2411269 1205635 0.8337 0.4418446
## Site 6 49312564 8218761 5.6833 0.0002413 ***
## Treatment:Site 11 36764962 3342269 2.3112 0.0263885 *
## Residuals 40 57844644 1446116
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
merged_Lignin <- merge(cazy_substrate_Lignin, env_data, by = "Sample") %>%
filter(Sample != "SEr2E")
# correaltion
# Select relevant columns (numeric factors)
cor_data <- merged_Lignin %>%
select(reads_sum, moisture, ph, sphag, Dim.1, oxic, CN, eco_index)
# Compute Pearson correlation matrix
cor_matrix <- cor(cor_data, use = "complete.obs", method = "pearson")
# Print correlation matrix
print(cor_matrix)
## reads_sum moisture ph sphag Dim.1 oxic CN eco_index
## reads_sum 1.00000000 -0.18249263 -0.08710120 0.0855374 0.29667510 -0.14826857 -0.14819168 -0.03851853
## moisture -0.18249263 1.00000000 0.43568799 0.4366189 -0.35949111 -0.25681759 0.06967877 0.67901038
## ph -0.08710120 0.43568799 1.00000000 0.2438908 -0.33906204 -0.30907168 -0.04545533 0.63207308
## sphag 0.08553740 0.43661892 0.24389085 1.0000000 -0.13416082 -0.49279052 -0.20426364 0.69801540
## Dim.1 0.29667510 -0.35949111 -0.33906204 -0.1341608 1.00000000 -0.01288157 -0.21379767 -0.41333962
## oxic -0.14826857 -0.25681759 -0.30907168 -0.4927905 -0.01288157 1.00000000 0.39330447 -0.72622506
## CN -0.14819168 0.06967877 -0.04545533 -0.2042636 -0.21379767 0.39330447 1.00000000 -0.40595784
## eco_index -0.03851853 0.67901038 0.63207308 0.6980154 -0.41333962 -0.72622506 -0.40595784 1.00000000
# Optional: Visualize correlations with a heatmap
ggcorrplot(cor_matrix, lab = TRUE, colors = c("blue", "white", "red"))
Lignin_lm_env <- lm(reads_sum ~ moisture+ ph+ sphag+ Dim.1+ oxic+ CN, data = merged_Lignin)
summary(Lignin_lm_env)
##
## Call:
## lm(formula = reads_sum ~ moisture + ph + sphag + Dim.1 + oxic +
## CN, data = merged_Lignin)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3447.0 -1133.2 -252.1 1142.6 2560.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22270.151 7965.505 2.796 0.00747 **
## moisture -89.960 81.384 -1.105 0.27462
## ph 67.694 1487.361 0.046 0.96389
## sphag 16.785 20.468 0.820 0.41631
## Dim.1 70.386 43.694 1.611 0.11390
## oxic -103.479 147.635 -0.701 0.48682
## CN -1.673 40.561 -0.041 0.96728
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1595 on 47 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.1376, Adjusted R-squared: 0.0275
## F-statistic: 1.25 on 6 and 47 DF, p-value: 0.2987
Lignin_lm_env <- lm(reads_sum ~ Dim.1, data = merged_Lignin)
summary(Lignin_lm_env)
##
## Call:
## lm(formula = reads_sum ~ Dim.1, data = merged_Lignin)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3921.4 -1251.5 -256.9 1288.3 2715.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13991.68 201.94 69.287 <2e-16 ***
## Dim.1 78.34 36.68 2.136 0.037 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1540 on 57 degrees of freedom
## Multiple R-squared: 0.0741, Adjusted R-squared: 0.05786
## F-statistic: 4.562 on 1 and 57 DF, p-value: 0.037
Lignin_lm_eco <- lm(reads_sum ~ eco_index, data = merged_Lignin)
summary(Lignin_lm_eco)
##
## Call:
## lm(formula = reads_sum ~ eco_index, data = merged_Lignin)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3499.5 -1193.1 -231.4 1298.2 3058.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14042.11 208.46 67.361 <2e-16 ***
## eco_index -10.91 373.83 -0.029 0.977
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1601 on 57 degrees of freedom
## Multiple R-squared: 1.493e-05, Adjusted R-squared: -0.01753
## F-statistic: 0.000851 on 1 and 57 DF, p-value: 0.9768
Figure_5f <- ggplot(merged_Lignin, aes(x = eco_index, y = reads_sum)) +
geom_point(color = "black",size = 4, alpha = 0.7,aes(shape=as.character(Treatment.x),
fill=as.character(Site.x))) +
geom_smooth(method = "lm", color = "black", se = TRUE, linetype = "dashed", size = 0.7, alpha = 0.3) +
scale_fill_manual(values=c( "#e41a1c",
"#377eb8",
"#4daf4a",
"#984ea3",
"#ff7f00",
"#ffff33",
"#a65628"),
labels = c("Balmoral" = "Balmoral",
"Bowness" = "Bowness",
"Crocach" = "Crocach",
"Langwell" = "Langwell",
"Migneint" = "Migneint",
"Moors_House" = "Moor House",
"Stean" = "Stean" )) +
scale_shape_manual(name="Treatment", values=c(21, 22, 23), limits=c("NAT", "REST", "DAM"),
labels=c("DAM" = "Degraded", "NAT" = "Natural", "REST" = "Restored"))+
ylab(expression(atop("Lignin", paste("(normalised reads ×1000)"))))+
scale_y_continuous(labels = function(x) x / 1000) +
guides(
fill = guide_legend(order = 1, title = "Site", override.aes = list(shape = 21, color = "black")),
shape = guide_legend(order = 2, title = "Treatment") ) +
labs(x = "Ecosystem health index") +
theme_light(base_size = 14)+
theme(axis.text = element_text(size = 13, colour = "black"))
Figure_5f
## `geom_smooth()` using formula = 'y ~ x'
### Save the plot
ggsave("Figure_5f.png", dpi=1000, width = 6, height = 4.5, units = "in")
## `geom_smooth()` using formula = 'y ~ x'
cazy_substrate_Cellulose <- cazy_substrate_sum %>%
filter(Substrate == "Cellulose")
Figure_5b <- ggplot(cazy_substrate_Cellulose, aes(x=Treatment, y=reads_sum))+
geom_boxplot(lwd=1,outlier.shape= NA)+
geom_jitter(aes(y=reads_sum, x=Treatment, color=Site), alpha=0.7, width = 0.3,size=3)+
ylab(expression(atop("Cellulose", paste("(normalised reads ×1000)"))))+ ### [reported] = subscript
xlab(" ")+
theme_light(base_size = 14)+
theme(legend.position = "None")+
scale_colour_manual(values=c( "#e41a1c",
"#377eb8",
"#4daf4a",
"#984ea3",
"#ff7f00",
"#ffff33",
"#a65628"),
labels = c("Balmoral" = "Balmoral",
"Bowness" = "Bowness",
"Crocach" = "Crocach",
"Langwell" = "Langwell",
"Migneint" = "Migneint",
"Moors_House" = "Moor House",
"Stean" = "Stean" )) +
scale_x_discrete(limits=c("DAM", "REST","NAT"),labels=c("DAM" = "Degraded", "NAT" = "Natural",
"REST" = "Restored"))+
scale_y_continuous(labels = function(x) x / 1000,
limits = c(32000, 56000),
breaks = seq(35000, 55000, by = 5000)) + # Adjust breaks and limits
theme(legend.text=element_text(size=16))+
theme(legend.position = "None")+
theme(axis.text=element_text(size=13,colour="black"))
Figure_5b
### Save the plot
ggsave("Figure_5b.png", dpi=1000, width = 4, height = 4, units = "in")
Cellulose_lm <- lm(reads_sum ~ Treatment, data = cazy_substrate_Cellulose)
# Perform pairwise comparisons for the Treatment variable
emmeans_results <- emmeans(Cellulose_lm, ~ Treatment)
# Perform pairwise comparisons for DAM vs. REST, REST vs. NAT, DAM vs. NAT
pairwise_comparisons <- pairs(emmeans_results, adjust = "tukey")
# Print the pairwise comparisons
summary(pairwise_comparisons)
## contrast estimate SE df t.ratio p.value
## DAM - NAT -1090 1690 57 -0.646 0.7952
## DAM - REST -133 1620 57 -0.082 0.9963
## NAT - REST 957 1690 57 0.567 0.8379
##
## P value adjustment: tukey method for comparing a family of 3 estimates
Cellulose_lm_interaction <- lm(reads_sum ~ Treatment * Site, data = cazy_substrate_Cellulose)
anova(Cellulose_lm_interaction)
## Analysis of Variance Table
##
## Response: reads_sum
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 13389900 6694950 0.5394 0.58727
## Site 6 806994676 134499113 10.8367 3.875e-07 ***
## Treatment:Site 11 268473848 24406713 1.9665 0.05898 .
## Residuals 40 496459105 12411478
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
merged_Cellulose <- merge(cazy_substrate_Cellulose, env_data, by = "Sample") %>%
filter(Sample != "SEr2E")
# correaltion
# Select relevant columns (numeric factors)
cor_data <- merged_Cellulose %>%
select(reads_sum, moisture, ph, sphag, Dim.1, oxic, CN, eco_index)
# Compute Pearson correlation matrix
cor_matrix <- cor(cor_data, use = "complete.obs", method = "pearson")
# Print correlation matrix
print(cor_matrix)
## reads_sum moisture ph sphag Dim.1 oxic CN eco_index
## reads_sum 1.0000000 0.25638391 0.10376729 0.3658205 0.11744706 -0.52517148 -0.20300711 0.4023317
## moisture 0.2563839 1.00000000 0.43568799 0.4366189 -0.35949111 -0.25681759 0.06967877 0.6790104
## ph 0.1037673 0.43568799 1.00000000 0.2438908 -0.33906204 -0.30907168 -0.04545533 0.6320731
## sphag 0.3658205 0.43661892 0.24389085 1.0000000 -0.13416082 -0.49279052 -0.20426364 0.6980154
## Dim.1 0.1174471 -0.35949111 -0.33906204 -0.1341608 1.00000000 -0.01288157 -0.21379767 -0.4133396
## oxic -0.5251715 -0.25681759 -0.30907168 -0.4927905 -0.01288157 1.00000000 0.39330447 -0.7262251
## CN -0.2030071 0.06967877 -0.04545533 -0.2042636 -0.21379767 0.39330447 1.00000000 -0.4059578
## eco_index 0.4023317 0.67901038 0.63207308 0.6980154 -0.41333962 -0.72622506 -0.40595784 1.0000000
# Optional: Visualize correlations with a heatmap
ggcorrplot(cor_matrix, lab = TRUE, colors = c("blue", "white", "red"))
Cellulose_lm_env <- lm(reads_sum ~ moisture+ ph+ sphag+ Dim.1+ oxic+ CN, data = merged_Cellulose)
summary(Cellulose_lm_env)
##
## Call:
## lm(formula = reads_sum ~ moisture + ph + sphag + Dim.1 + oxic +
## CN, data = merged_Cellulose)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11360.0 -2014.6 -748.1 3197.9 9605.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 36116.60 21847.49 1.653 0.10497
## moisture 286.13 223.22 1.282 0.20619
## ph -2564.96 4079.48 -0.629 0.53256
## sphag 39.86 56.14 0.710 0.48123
## Dim.1 149.15 119.84 1.245 0.21948
## oxic -1222.78 404.93 -3.020 0.00408 **
## CN 13.49 111.25 0.121 0.90402
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4374 on 47 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.3341, Adjusted R-squared: 0.2491
## F-statistic: 3.93 on 6 and 47 DF, p-value: 0.002902
Cellulose_lm_env <- lm(reads_sum ~ Dim.1, data = merged_Cellulose)
summary(Cellulose_lm_env)
##
## Call:
## lm(formula = reads_sum ~ Dim.1, data = merged_Cellulose)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10535.5 -3751.8 -767.4 4271.5 11901.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 43352.83 664.17 65.273 <2e-16 ***
## Dim.1 68.21 120.64 0.565 0.574
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5066 on 57 degrees of freedom
## Multiple R-squared: 0.005576, Adjusted R-squared: -0.01187
## F-statistic: 0.3196 on 1 and 57 DF, p-value: 0.5741
Cellulose_lm_eco <- lm(reads_sum ~ eco_index, data = merged_Cellulose)
summary(Cellulose_lm_eco)
##
## Call:
## lm(formula = reads_sum ~ eco_index, data = merged_Cellulose)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13048 -3352 -742 4329 11488
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 43431.3 626.2 69.353 <2e-16 ***
## eco_index 2889.0 1123.0 2.572 0.0127 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4809 on 57 degrees of freedom
## Multiple R-squared: 0.104, Adjusted R-squared: 0.0883
## F-statistic: 6.618 on 1 and 57 DF, p-value: 0.01273
Figure_5g <- ggplot(merged_Cellulose, aes(x = eco_index, y = reads_sum)) +
geom_point(color = "black",size = 4, alpha = 0.7,aes(shape=as.character(Treatment.x),
fill=as.character(Site.x))) +
geom_smooth(method = "lm", color = "black", se = TRUE, linetype = "dashed", size = 0.7, alpha = 0.3) +
scale_fill_manual(values=c( "#e41a1c",
"#377eb8",
"#4daf4a",
"#984ea3",
"#ff7f00",
"#ffff33",
"#a65628"),
labels = c("Balmoral" = "Balmoral",
"Bowness" = "Bowness",
"Crocach" = "Crocach",
"Langwell" = "Langwell",
"Migneint" = "Migneint",
"Moors_House" = "Moor House",
"Stean" = "Stean" )) +
scale_shape_manual(name="Treatment", values=c(21, 22, 23), limits=c("NAT", "REST", "DAM"),
labels=c("DAM" = "Degraded", "NAT" = "Natural", "REST" = "Restored"))+
ylab(expression(atop("Cellulose", paste("(normalised reads ×1000)"))))+
scale_y_continuous(labels = function(x) x / 1000) +
guides(
fill = guide_legend(order = 1, title = "Site", override.aes = list(shape = 21, color = "black")),
shape = guide_legend(order = 2, title = "Treatment") ) +
labs(x = "Ecosystem health index") +
theme_light(base_size = 14)+
theme(axis.text = element_text(size = 13, colour = "black"))
Figure_5g
## `geom_smooth()` using formula = 'y ~ x'
### Save the plot
ggsave("Figure_5g.png", dpi=1000, width = 6, height = 4.5, units = "in")
## `geom_smooth()` using formula = 'y ~ x'
cazy_substrate_Oligosaccharides <- cazy_substrate_sum %>%
filter(Substrate == "Oligosaccharides" )
Figure_5c <- ggplot(cazy_substrate_Oligosaccharides, aes(x=Treatment, y=reads_sum))+
geom_boxplot(lwd=1,outlier.shape= NA)+
geom_jitter(aes(y=reads_sum, x=Treatment, color=Site), alpha=0.7, width = 0.3,size=3)+
ylab(expression(atop("Oligosaccharides", paste("(normalised reads ×1000)"))))+ ### [reported] = subscript
xlab(" ")+
theme_light(base_size = 14)+
theme(legend.position = "None")+
scale_colour_manual(values=c( "#e41a1c",
"#377eb8",
"#4daf4a",
"#984ea3",
"#ff7f00",
"#ffff33",
"#a65628"),
labels = c("Balmoral" = "Balmoral",
"Bowness" = "Bowness",
"Crocach" = "Crocach",
"Langwell" = "Langwell",
"Migneint" = "Migneint",
"Moors_House" = "Moor House",
"Stean" = "Stean" )) +
scale_x_discrete(limits=c("DAM", "REST","NAT"),labels=c("DAM" = "Degraded", "NAT" = "Natural",
"REST" = "Restored"))+
scale_y_continuous(labels = function(x) x / 1000, limits = c(2000, 13500)) +
theme(legend.text=element_text(size=16))+
theme(legend.position = "None")+
theme(axis.text=element_text(size=13,colour="black"))
Figure_5c
### Save the plot
ggsave("Figure_5c.png", dpi=1000, width = 4, height = 4, units = "in")
Oligosaccharides_lm <- lm(reads_sum ~ Treatment, data = cazy_substrate_Oligosaccharides)
# Perform pairwise comparisons for the Treatment variable
emmeans_results <- emmeans(Oligosaccharides_lm, ~ Treatment)
# Perform pairwise comparisons for DAM vs. REST, REST vs. NAT, DAM vs. NAT
pairwise_comparisons <- pairs(emmeans_results, adjust = "tukey")
# Print the pairwise comparisons
summary(pairwise_comparisons)
## contrast estimate SE df t.ratio p.value
## DAM - NAT -1183 932 57 -1.270 0.4178
## DAM - REST -200 895 57 -0.223 0.9730
## NAT - REST 984 932 57 1.056 0.5452
##
## P value adjustment: tukey method for comparing a family of 3 estimates
Oligosaccharides_lm_interaction <- lm(reads_sum ~ Treatment * Site, data = cazy_substrate_Oligosaccharides)
anova(Oligosaccharides_lm_interaction)
## Analysis of Variance Table
##
## Response: reads_sum
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 15204806 7602403 3.1373 0.05424 .
## Site 6 312841832 52140305 21.5167 4.172e-11 ***
## Treatment:Site 11 69657850 6332532 2.6132 0.01304 *
## Residuals 40 96929993 2423250
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
merged_Oligosaccharides <- merge(cazy_substrate_Oligosaccharides, env_data, by = "Sample") %>%
filter(Sample != "SEr2E")
#correaltion
# Select relevant columns (numeric factors)
cor_data <- merged_Oligosaccharides %>%
select(reads_sum, moisture, ph, sphag, Dim.1, oxic, CN, eco_index)
# Compute Pearson correlation matrix
cor_matrix <- cor(cor_data, use = "complete.obs", method = "pearson")
# Print correlation matrix
print(cor_matrix)
## reads_sum moisture ph sphag Dim.1 oxic CN eco_index
## reads_sum 1.00000000 0.48171607 0.25894508 0.5273517 0.04112217 -0.42191699 -0.09427875 0.5001017
## moisture 0.48171607 1.00000000 0.43568799 0.4366189 -0.35949111 -0.25681759 0.06967877 0.6790104
## ph 0.25894508 0.43568799 1.00000000 0.2438908 -0.33906204 -0.30907168 -0.04545533 0.6320731
## sphag 0.52735171 0.43661892 0.24389085 1.0000000 -0.13416082 -0.49279052 -0.20426364 0.6980154
## Dim.1 0.04112217 -0.35949111 -0.33906204 -0.1341608 1.00000000 -0.01288157 -0.21379767 -0.4133396
## oxic -0.42191699 -0.25681759 -0.30907168 -0.4927905 -0.01288157 1.00000000 0.39330447 -0.7262251
## CN -0.09427875 0.06967877 -0.04545533 -0.2042636 -0.21379767 0.39330447 1.00000000 -0.4059578
## eco_index 0.50010170 0.67901038 0.63207308 0.6980154 -0.41333962 -0.72622506 -0.40595784 1.0000000
ggcorrplot(cor_matrix, lab = TRUE, colors = c("blue", "white", "red"))
Oligosaccharides_lm_env <- lm(reads_sum ~ moisture+ ph+ sphag+ Dim.1+ oxic+ CN, data = merged_Oligosaccharides)
summary(Oligosaccharides_lm_env)
##
## Call:
## lm(formula = reads_sum ~ moisture + ph + sphag + Dim.1 + oxic +
## CN, data = merged_Oligosaccharides)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3431.8 -1407.8 -579.5 1032.0 5308.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -22765.59 11044.26 -2.061 0.0448 *
## moisture 290.60 112.84 2.575 0.0132 *
## ph 900.34 2062.24 0.437 0.6644
## sphag 65.58 28.38 2.311 0.0253 *
## Dim.1 118.29 60.58 1.952 0.0568 .
## oxic -268.58 204.70 -1.312 0.1959
## CN 32.61 56.24 0.580 0.5648
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2211 on 47 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.4324, Adjusted R-squared: 0.36
## F-statistic: 5.968 on 6 and 47 DF, p-value: 0.0001077
Oligosaccharides_lm_env <- lm(reads_sum ~ moisture+ sphag+ Dim.1, data = merged_Oligosaccharides)
summary(Oligosaccharides_lm_env)
##
## Call:
## lm(formula = reads_sum ~ moisture + sphag + Dim.1, data = merged_Oligosaccharides)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3964.5 -1549.7 -369.6 1245.3 5414.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -23774.98 9178.78 -2.590 0.01253 *
## moisture 326.60 105.62 3.092 0.00324 **
## sphag 79.92 25.18 3.174 0.00258 **
## Dim.1 114.78 56.89 2.018 0.04901 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2195 on 50 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.4047, Adjusted R-squared: 0.369
## F-statistic: 11.33 on 3 and 50 DF, p-value: 8.743e-06
Oligosaccharides_lm_eco <- lm(reads_sum ~ eco_index, data = merged_Oligosaccharides)
summary(Oligosaccharides_lm_eco)
##
## Call:
## lm(formula = reads_sum ~ eco_index, data = merged_Oligosaccharides)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4586.3 -1966.4 -485.8 1621.3 7441.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6552.6 352.2 18.604 < 2e-16 ***
## eco_index 1808.9 631.6 2.864 0.00585 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2705 on 57 degrees of freedom
## Multiple R-squared: 0.1258, Adjusted R-squared: 0.1105
## F-statistic: 8.202 on 1 and 57 DF, p-value: 0.005848
Figure_5h <- ggplot(merged_Oligosaccharides, aes(x = eco_index, y = reads_sum)) +
geom_point(color = "black",size = 4, alpha = 0.7,aes(shape=as.character(Treatment.x),
fill=as.character(Site.x))) +
geom_smooth(method = "lm", color = "black", se = TRUE, linetype = "dashed", size = 0.7, alpha = 0.3) +
scale_fill_manual(values=c( "#e41a1c",
"#377eb8",
"#4daf4a",
"#984ea3",
"#ff7f00",
"#ffff33",
"#a65628"),
labels = c("Balmoral" = "Balmoral",
"Bowness" = "Bowness",
"Crocach" = "Crocach",
"Langwell" = "Langwell",
"Migneint" = "Migneint",
"Moors_House" = "Moor House",
"Stean" = "Stean" )) +
scale_shape_manual(name="Treatment", values=c(21, 22, 23), limits=c("NAT", "REST", "DAM"),
labels=c("DAM" = "Degraded", "NAT" = "Natural", "REST" = "Restored"))+
ylab(expression(atop("Oligosaccharides", paste("(normalised reads ×1000)"))))+
scale_y_continuous(labels = function(x) x / 1000) +
guides(
fill = guide_legend(order = 1, title = "Site", override.aes = list(shape = 21, color = "black")),
shape = guide_legend(order = 2, title = "Treatment") ) +
labs(x = "Ecosystem health index") +
theme_light(base_size = 14)+
theme(axis.text = element_text(size = 13, colour = "black"))
Figure_5h
## `geom_smooth()` using formula = 'y ~ x'
### Save the plot
ggsave("Figure_5h.png", dpi=1000, width = 6, height = 4.5, units = "in")
## `geom_smooth()` using formula = 'y ~ x'
ko_levels <- read.csv("KO_levels.csv")
kegg_levels <- kegg_abund_gather %>%
left_join(x=.,y=ko_levels, by=c("KO"))
## Warning in left_join(x = ., y = ko_levels, by = c("KO")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 15 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning.
kegg_levels_aromatic <- kegg_levels %>%
filter(Category3 == "01220_Degradation_of_aromatic_compounds_[PATH:ko01220]")
#00190_Oxidative_phosphorylation_[PATH:ko00190]
kegg_levels_aromatic <- kegg_levels_aromatic %>%
dplyr::group_by(Sample, Site, Treatment) %>%
dplyr::summarise(sum = sum(reads_norm))
## `summarise()` has grouped output by 'Sample', 'Site'. You can override using the `.groups` argument.
Figure_5d <- ggplot(kegg_levels_aromatic, aes(x=Treatment, y=sum))+
geom_boxplot(lwd=1,outlier.shape= NA)+
geom_jitter(aes(y=sum, x=Treatment, color=Site), alpha=0.7, width = 0.3,size=3)+
ylab(expression(atop("Aromatic compounds", paste("(normalised reads ×1000)"))))+ ### [reported] = subscript
xlab(" ")+
theme_light(base_size = 14)+
theme(legend.position = "None")+
scale_colour_manual(values=c( "#e41a1c",
"#377eb8",
"#4daf4a",
"#984ea3",
"#ff7f00",
"#ffff33",
"#a65628"),
labels = c("Balmoral" = "Balmoral",
"Bowness" = "Bowness",
"Crocach" = "Crocach",
"Langwell" = "Langwell",
"Migneint" = "Migneint",
"Moors_House" = "Moor House",
"Stean" = "Stean" )) +
scale_x_discrete(limits=c("DAM", "REST","NAT"),labels=c("DAM" = "Degraded", "NAT" = "Natural",
"REST" = "Restored"))+
scale_y_continuous(labels = function(x) x / 1000, limits = c(18000, 52000)) +
theme(legend.text=element_text(size=16))+
theme(legend.position = "None")+
theme(axis.text=element_text(size=13,colour="black"))
Figure_5d
### Save the plot
ggsave("Figure_5d.png", dpi=1000, width = 4, height = 4, units = "in")
aromatic_lm <- lm(sum ~ Treatment, data = kegg_levels_aromatic)
# Perform pairwise comparisons for the Treatment variable
emmeans_results <- emmeans(aromatic_lm, ~ Treatment)
# Perform pairwise comparisons for DAM vs. REST, REST vs. NAT, DAM vs. NAT
pairwise_comparisons <- pairs(emmeans_results, adjust = "tukey")
# Print the pairwise comparisons
summary(pairwise_comparisons)
## contrast estimate SE df t.ratio p.value
## DAM - NAT 3487 2370 63 1.469 0.3124
## DAM - REST 845 2150 63 0.393 0.9184
## NAT - REST -2642 2250 63 -1.175 0.4723
##
## P value adjustment: tukey method for comparing a family of 3 estimates
aromatic_lm_interaction <- lm(sum ~ Treatment * Site, data = kegg_levels_aromatic)
anova(aromatic_lm_interaction)
## Analysis of Variance Table
##
## Response: sum
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 127163414 63581707 3.0308 0.058015 .
## Site 6 1787694877 297949146 14.2024 4.483e-09 ***
## Treatment:Site 11 685641406 62331037 2.9711 0.004679 **
## Residuals 46 965023737 20978777
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
merged_aromatic <- merge(kegg_levels_aromatic, env_data, by = "Sample") %>%
filter(Sample != "SEr2E")
#correaltion
# Select relevant columns (numeric factors)
cor_data <- merged_aromatic %>%
select(sum, moisture, ph, sphag, Dim.1, oxic, CN, eco_index)
# Compute Pearson correlation matrix
cor_matrix <- cor(cor_data, use = "complete.obs", method = "pearson")
# Print correlation matrix
print(cor_matrix)
## sum moisture ph sphag Dim.1 oxic CN eco_index
## sum 1.00000000 -0.39214403 -0.22426292 -0.4614426 0.01311506 0.21019512 -0.05700873 -0.3454815
## moisture -0.39214403 1.00000000 0.43568799 0.4366189 -0.35949111 -0.25681759 0.06967877 0.6790104
## ph -0.22426292 0.43568799 1.00000000 0.2438908 -0.33906204 -0.30907168 -0.04545533 0.6320731
## sphag -0.46144256 0.43661892 0.24389085 1.0000000 -0.13416082 -0.49279052 -0.20426364 0.6980154
## Dim.1 0.01311506 -0.35949111 -0.33906204 -0.1341608 1.00000000 -0.01288157 -0.21379767 -0.4133396
## oxic 0.21019512 -0.25681759 -0.30907168 -0.4927905 -0.01288157 1.00000000 0.39330447 -0.7262251
## CN -0.05700873 0.06967877 -0.04545533 -0.2042636 -0.21379767 0.39330447 1.00000000 -0.4059578
## eco_index -0.34548155 0.67901038 0.63207308 0.6980154 -0.41333962 -0.72622506 -0.40595784 1.0000000
# Optional: Visualize correlations with a heatmap
ggcorrplot(cor_matrix, lab = TRUE, colors = c("blue", "white", "red"))
aromatic_lm_env <- lm(sum ~ moisture+ ph+ sphag+ Dim.1+ oxic+ CN, data = merged_aromatic)
summary(aromatic_lm_env)
##
## Call:
## lm(formula = sum ~ moisture + ph + sphag + Dim.1 + oxic + CN,
## data = merged_aromatic)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13163.9 -4603.5 -209.5 3477.1 14205.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 108902.97 33353.64 3.265 0.00205 **
## moisture -526.37 340.78 -1.545 0.12914
## ph -4466.89 6227.97 -0.717 0.47678
## sphag -227.59 85.70 -2.656 0.01078 *
## Dim.1 -258.91 182.96 -1.415 0.16363
## oxic -77.31 618.19 -0.125 0.90102
## CN -199.37 169.84 -1.174 0.24635
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6677 on 47 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.3037, Adjusted R-squared: 0.2148
## F-statistic: 3.416 on 6 and 47 DF, p-value: 0.007022
aromatic_lm_env <- lm(sum ~ Dim.1, data = merged_aromatic)
summary(aromatic_lm_env)
##
## Call:
## lm(formula = sum ~ Dim.1, data = merged_aromatic)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16195 -5362 -516 3516 16770
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34732.07 989.43 35.103 <2e-16 ***
## Dim.1 29.71 179.72 0.165 0.869
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7548 on 57 degrees of freedom
## Multiple R-squared: 0.0004793, Adjusted R-squared: -0.01706
## F-statistic: 0.02733 on 1 and 57 DF, p-value: 0.8693
aromatic_lm_eco <- lm(sum ~ eco_index, data = merged_aromatic)
summary(aromatic_lm_eco)
##
## Call:
## lm(formula = sum ~ eco_index, data = merged_aromatic)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13795 -4558 -1100 3884 17120
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34707 944 36.765 <2e-16 ***
## eco_index -3714 1693 -2.194 0.0324 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7250 on 57 degrees of freedom
## Multiple R-squared: 0.07785, Adjusted R-squared: 0.06167
## F-statistic: 4.812 on 1 and 57 DF, p-value: 0.03235
Figure_5i <- ggplot(merged_aromatic, aes(x = eco_index, y = sum)) +
geom_point(color = "black",size = 4, alpha = 0.7,aes(shape=as.character(Treatment.x),
fill=as.character(Site.x))) +
geom_smooth(method = "lm", color = "black", se = TRUE, linetype = "dashed", size = 0.7, alpha = 0.3) +
scale_fill_manual(values=c( "#e41a1c",
"#377eb8",
"#4daf4a",
"#984ea3",
"#ff7f00",
"#ffff33",
"#a65628"),
labels = c("Balmoral" = "Balmoral",
"Bowness" = "Bowness",
"Crocach" = "Crocach",
"Langwell" = "Langwell",
"Migneint" = "Migneint",
"Moors_House" = "Moor House",
"Stean" = "Stean" )) +
scale_shape_manual(name="Treatment", values=c(21, 22, 23), limits=c("NAT", "REST", "DAM"),
labels=c("DAM" = "Degraded", "NAT" = "Natural", "REST" = "Restored"))+
ylab(expression(atop("Aromatic compounds", paste("(normalised reads ×1000)"))))+
scale_y_continuous(labels = function(x) x / 1000) +
guides(
fill = guide_legend(order = 1, title = "Site", override.aes = list(shape = 21, color = "black")),
shape = guide_legend(order = 2, title = "Treatment") ) +
labs(x = "Ecosystem health index") +
theme_light(base_size = 14)+
theme(axis.text = element_text(size = 13, colour = "black"))
Figure_5i
## `geom_smooth()` using formula = 'y ~ x'
### Save the plot
ggsave("Figure_5i.png", dpi=1000, width = 6, height = 4.5, units = "in")
## `geom_smooth()` using formula = 'y ~ x'
cazy_substrate_Cell_Wall<- cazy_substrate_sum %>%
filter(Substrate == "Cell Wall" )
Figure_5e <- ggplot(cazy_substrate_Cell_Wall, aes(x=Treatment, y=reads_sum))+
geom_boxplot(lwd=1,outlier.shape= NA)+
geom_jitter(aes(y=reads_sum, x=Treatment, color=Site), alpha=0.7, width = 0.3,size=3)+
ylab(expression(atop("Microbial cell wall", paste("(normalised reads ×1000)"))))+ ### [reported] = subscript
xlab(" ")+
theme_light(base_size = 14)+
theme(legend.position = "None")+
scale_colour_manual(values=c( "#e41a1c",
"#377eb8",
"#4daf4a",
"#984ea3",
"#ff7f00",
"#ffff33",
"#a65628"),
labels = c("Balmoral" = "Balmoral",
"Bowness" = "Bowness",
"Crocach" = "Crocach",
"Langwell" = "Langwell",
"Migneint" = "Migneint",
"Moors_House" = "Moor House",
"Stean" = "Stean" )) +
scale_x_discrete(limits=c("DAM", "REST","NAT"),labels=c("DAM" = "Degraded", "NAT" = "Natural",
"REST" = "Restored"))+
scale_y_continuous(labels = function(x) x / 1000, limits = c(1600, 5400)) +
theme(legend.text=element_text(size=16))+
theme(legend.position = "None")+
theme(axis.text=element_text(size=13,colour="black"))
Figure_5e
## Warning: Removed 1 row containing non-finite outside the scale range (`stat_boxplot()`).
## Warning: Removed 1 row containing missing values or values outside the scale range (`geom_point()`).
### Save the plot
ggsave("Figure_5e.png", dpi=1000, width = 4, height = 4, units = "in")
## Warning: Removed 1 row containing non-finite outside the scale range (`stat_boxplot()`).
## Warning: Removed 1 row containing missing values or values outside the scale range (`geom_point()`).
cell_wall_lm <- lm(reads_sum ~ Treatment, data = cazy_substrate_Cell_Wall)
# Perform pairwise comparisons for the Treatment variable
emmeans_results <- emmeans(cell_wall_lm, ~ Treatment)
# Perform pairwise comparisons for DAM vs. REST, REST vs. NAT, DAM vs. NAT
pairwise_comparisons <- pairs(emmeans_results, adjust = "tukey")
# Print the pairwise comparisons
summary(pairwise_comparisons)
## contrast estimate SE df t.ratio p.value
## DAM - NAT 693 276 57 2.506 0.0395
## DAM - REST 594 266 57 2.236 0.0738
## NAT - REST -99 276 57 -0.358 0.9319
##
## P value adjustment: tukey method for comparing a family of 3 estimates
cell_wall_lm_interaction <- lm(reads_sum ~ Treatment * Site, data = cazy_substrate_Cell_Wall)
anova(cell_wall_lm_interaction)
## Analysis of Variance Table
##
## Response: reads_sum
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 5674269 2837134 5.8318 0.005991 **
## Site 6 10337616 1722936 3.5415 0.006615 **
## Treatment:Site 11 12408174 1128016 2.3186 0.025933 *
## Residuals 40 19459892 486497
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
merged_cell_wall <- merge(cazy_substrate_Cell_Wall, env_data, by = "Sample") %>%
filter(Sample != "SEr2E")
#correaltion
# Select relevant columns (numeric factors)
cor_data <- merged_cell_wall %>%
select(reads_sum, moisture, ph, sphag, Dim.1, oxic, CN, eco_index)
# Compute Pearson correlation matrix
cor_matrix <- cor(cor_data, use = "complete.obs", method = "pearson")
# Print correlation matrix
print(cor_matrix)
## reads_sum moisture ph sphag Dim.1 oxic CN eco_index
## reads_sum 1.00000000 -0.01544928 0.09355379 -0.2145069 0.18157696 0.02612597 0.13501816 -0.1294993
## moisture -0.01544928 1.00000000 0.43568799 0.4366189 -0.35949111 -0.25681759 0.06967877 0.6790104
## ph 0.09355379 0.43568799 1.00000000 0.2438908 -0.33906204 -0.30907168 -0.04545533 0.6320731
## sphag -0.21450688 0.43661892 0.24389085 1.0000000 -0.13416082 -0.49279052 -0.20426364 0.6980154
## Dim.1 0.18157696 -0.35949111 -0.33906204 -0.1341608 1.00000000 -0.01288157 -0.21379767 -0.4133396
## oxic 0.02612597 -0.25681759 -0.30907168 -0.4927905 -0.01288157 1.00000000 0.39330447 -0.7262251
## CN 0.13501816 0.06967877 -0.04545533 -0.2042636 -0.21379767 0.39330447 1.00000000 -0.4059578
## eco_index -0.12949930 0.67901038 0.63207308 0.6980154 -0.41333962 -0.72622506 -0.40595784 1.0000000
# Optional: Visualize correlations with a heatmap
#library(ggcorrplot)
ggcorrplot(cor_matrix, lab = TRUE, colors = c("blue", "white", "red"))
cell_wall_lm_env <- lm(reads_sum ~ moisture+ ph+ sphag+ Dim.1+ oxic+ CN, data = merged_cell_wall)
summary(cell_wall_lm_env)
##
## Call:
## lm(formula = reads_sum ~ moisture + ph + sphag + Dim.1 + oxic +
## CN, data = merged_cell_wall)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1578.18 -346.95 -13.14 264.25 1458.64
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1740.281 3583.197 -0.486 0.6295
## moisture 17.247 36.610 0.471 0.6397
## ph 836.766 669.073 1.251 0.2173
## sphag -14.764 9.207 -1.604 0.1155
## Dim.1 35.872 19.655 1.825 0.0743 .
## oxic -35.488 66.412 -0.534 0.5956
## CN 21.455 18.246 1.176 0.2456
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 717.3 on 47 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.1474, Adjusted R-squared: 0.03857
## F-statistic: 1.354 on 6 and 47 DF, p-value: 0.2527
cell_wall_lm_env <- lm(reads_sum ~ Dim.1, data = merged_cell_wall)
summary(cell_wall_lm_env)
##
## Call:
## lm(formula = reads_sum ~ Dim.1, data = merged_cell_wall)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1515.4 -490.7 -178.9 383.8 3936.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3272.08 119.67 27.342 <2e-16 ***
## Dim.1 14.62 21.74 0.673 0.504
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 912.9 on 57 degrees of freedom
## Multiple R-squared: 0.007873, Adjusted R-squared: -0.009533
## F-statistic: 0.4523 on 1 and 57 DF, p-value: 0.5039
cell_wall_lm_eco <- lm(reads_sum ~ eco_index, data = merged_cell_wall)
summary(cell_wall_lm_eco)
##
## Call:
## lm(formula = reads_sum ~ eco_index, data = merged_cell_wall)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1431.0 -532.0 -132.1 440.4 3583.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3276.9 115.9 28.27 <2e-16 ***
## eco_index -384.6 207.9 -1.85 0.0695 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 890.1 on 57 degrees of freedom
## Multiple R-squared: 0.05666, Adjusted R-squared: 0.04011
## F-statistic: 3.423 on 1 and 57 DF, p-value: 0.06947
Figure_5j <- ggplot(merged_cell_wall, aes(x = eco_index, y = reads_sum)) +
geom_point(color = "black",size = 4, alpha = 0.7,aes(shape=as.character(Treatment.x),
fill=as.character(Site.x))) +
geom_smooth(method = "lm", color = "black", se = TRUE, linetype = "dashed", size = 0.7, alpha = 0.3) +
scale_fill_manual(values=c( "#e41a1c",
"#377eb8",
"#4daf4a",
"#984ea3",
"#ff7f00",
"#ffff33",
"#a65628"),
labels = c("Balmoral" = "Balmoral",
"Bowness" = "Bowness",
"Crocach" = "Crocach",
"Langwell" = "Langwell",
"Migneint" = "Migneint",
"Moors_House" = "Moor House",
"Stean" = "Stean" )) +
scale_shape_manual(name="Treatment", values=c(21, 22, 23), limits=c("NAT", "REST", "DAM"),
labels=c("DAM" = "Degraded", "NAT" = "Natural", "REST" = "Restored"))+
ylab(expression(atop("Microbial cell wall", paste("(normalised reads ×1000)"))))+
scale_y_continuous(labels = function(x) x / 1000) +
guides(
fill = guide_legend(order = 1, title = "Site", override.aes = list(shape = 21, color = "black")),
shape = guide_legend(order = 2, title = "Treatment") ) +
labs(x = "Ecosystem health index") +
theme_light(base_size = 14)+
theme(axis.text = element_text(size = 13, colour = "black"))
Figure_5j
## `geom_smooth()` using formula = 'y ~ x'
### Save the plot
ggsave("Figure_5j.png", dpi=1000, width = 6, height = 4.5, units = "in")
## `geom_smooth()` using formula = 'y ~ x'
# Define your list of KEGG pathways and labels
pathways <- list(
list("01220_Degradation_of_aromatic_compounds_[PATH:ko01220]", "Degradation of Aromatic Compounds", "Aromatic compounds", "Aromatic"),
list("00680_Methane_metabolism_[PATH:ko00680]", "Methane metabolism", "Methane metabolism", "Methane"),
list("00720_Carbon_fixation_pathways_in_prokaryotes_[PATH:ko00720]", "Carbon fixation pathways in prokaryotes", "Carbon fixation in prokaryotes", "CarbonFixation"),
list("00910_Nitrogen_metabolism_[PATH:ko00910]", "Nitrogen metabolism", "Nitrogen metabolism", "Nitrogen"),
list("00920_Sulfur_metabolism_[PATH:ko00920]", "Sulfur metabolism", "Sulfur metabolism", "Sulfur")
)
# Loop through each pathway
for (pathway in pathways) {
kegg_id <- pathway[[1]]
title <- pathway[[2]]
x_axis_label <- pathway[[3]]
filename_prefix <- pathway[[4]]
# Filter and summarise
df_pathway <- kegg_levels %>%
filter(Category3 == kegg_id) %>%
group_by(Sample, Site, Treatment) %>%
summarise(sum = sum(reads_norm), .groups = "drop") %>%
filter(Sample %in% c("MGr3I", "MGr3H", "MGr3G", "MGr2F", "MGr2E", "MGr2D", "MGr1C", "MGr1B",
"MGr1A", "CRr3I", "CRr3H", "CRr3G", "CRr2F", "CRr2E", "CRr2D", "CRr1C",
"CRr1B", "CRr1A", "MHr3I", "MHr3H", "MHr3G", "MHr2F", "MHr2E", "MHr2D",
"MHr1C", "MHr1B", "MHr1A", "BOr3I", "BOr3H", "BOr3G", "BOr2F", "BOr2E",
"BOr2D", "BOr1C", "BOr1B", "BOr1A", "BAr3I", "BAr3H", "BAr3G", "BAr2F",
"BAr2E", "BAr2D", "BAr1C", "BAr1B", "BAr1A",
"LASCr2F", "LASCr2E", "LASCr2D",
"LABRr1C", "LABRr1B", "LABRr1A", "LASAr3I", "LASAr3G", "LASAr3H"))
# Merge with environmental data
merged_pathway <- merge(df_pathway, env_data, by = "Sample")
# ---- Linear model ----
model <- lm(log(Growth) ~ sum, data = merged_pathway)
cat("\nPathway:", title, "\n")
print(summary(model))
# Reorder Site.x
site_order <- c("Balmoral", "Bowness", "Moors_House", "Langwell", "Crocach", "Migneint")
merged_pathway$Site.x <- factor(merged_pathway$Site.x, levels = (site_order))
Figure_5_substrate_plot1 <- ggplot(merged_pathway, aes(x=Treatment.x, y=sum))+
geom_boxplot(lwd=1,outlier.shape= NA)+
geom_jitter(aes(y=sum, x=Treatment.x, color=Site.x), alpha=0.7, width = 0.3,size=3)+
ylab(bquote(atop(.(title), "(normalised reads ×1000)")))+
xlab(" ")+
theme_light(base_size = 14)+
theme(legend.position = "None")+
scale_colour_manual(values=c( "#e41a1c",
"#377eb8",
"#4daf4a",
"#984ea3",
"#ff7f00",
"#ffff33",
"#a65628"),
labels = c("Balmoral" = "Balmoral",
"Bowness" = "Bowness",
"Crocach" = "Crocach",
"Langwell" = "Langwell",
"Migneint" = "Migneint",
"Moors_House" = "Moor House",
"Stean" = "Stean" )) +
scale_x_discrete(limits=c("DAM", "REST","NAT"),labels=c("DAM" = "Degraded", "NAT" = "Natural",
"REST" = "Restored"))+
scale_y_continuous(labels = function(x) x / 1000 , limits = c(floor(min(merged_pathway$sum) / 1000) * 1000, ceiling(max(merged_pathway$sum) / 1000) * 1000)) +
theme(legend.text=element_text(size=16))+
theme(legend.position = "None")+
theme(axis.text=element_text(size=13,colour="black"))
Figure_5_substrate_plot1
ggsave(paste0("Figure_5_",filename_prefix, "_plot1.png"), dpi=1000, width = 4, height = 4, units = "in")
# Figure 5f ##
substrate_lm <- lm(sum ~ Treatment.x, data = merged_pathway)
# Perform pairwise comparisons for the Treatment variable
emmeans_results <- emmeans(substrate_lm, ~ Treatment.x)
# Perform pairwise comparisons for DAM vs. REST, REST vs. NAT, DAM vs. NAT
pairwise_comparisons <- pairs(emmeans_results, adjust = "tukey")
# Print the pairwise comparisons
summary(pairwise_comparisons)
substrate_lm_interaction <- lm(sum ~ Treatment.x * Site.x, data = merged_pathway)
anova(substrate_lm_interaction)
# correaltion
# Select relevant columns (numeric factors)
cor_data <- merged_pathway %>%
select(sum, moisture, ph, sphag, Dim.1, oxic, CN, eco_index)
# Compute Pearson correlation matrix
cor_matrix <- cor(cor_data, use = "complete.obs", method = "pearson")
# Print correlation matrix
print(cor_matrix)
# Optional: Visualize correlations with a heatmap
ggcorrplot(cor_matrix, lab = TRUE, colors = c("blue", "white", "red"))
substrate_lm_env <- lm(sum ~ moisture+ ph+ sphag+ Dim.1+ oxic+ CN, data = merged_pathway)
summary(substrate_lm_env)
substrate_lm_env <- lm(sum ~ Dim.1, data = merged_pathway)
summary(substrate_lm_env)
substrate_lm_eco <- lm(sum ~ eco_index, data = merged_pathway)
summary(substrate_lm_eco)
Figure_5_substrate_plot2 <- ggplot(merged_pathway, aes(x = eco_index, y = sum)) +
geom_point(color = "black",size = 4, alpha = 0.7,aes(shape=as.character(Treatment.x),
fill=as.character(Site.x))) +
geom_smooth(method = "lm", color = "black", se = TRUE, linetype = "dashed", size = 0.7, alpha = 0.3) + # Regression line with confidence interval
scale_fill_manual(values=c( "#e41a1c",
"#377eb8",
"#4daf4a",
"#984ea3",
"#ff7f00",
"#ffff33",
"#a65628"),
labels = c("Balmoral" = "Balmoral",
"Bowness" = "Bowness",
"Crocach" = "Crocach",
"Langwell" = "Langwell",
"Migneint" = "Migneint",
"Moors_House" = "Moor House",
"Stean" = "Stean" )) +
scale_shape_manual(name="Treatment", values=c(21, 22, 23), limits=c("NAT", "REST", "DAM"),
labels=c("DAM" = "Degraded", "NAT" = "Natural", "REST" = "Restored"))+
ylab(bquote(atop(.(title), "(normalised reads ×1000)")))+
scale_y_continuous(labels = function(x) x / 1000) +
guides(
fill = guide_legend(order = 1, title = "Site", override.aes = list(shape = 21, color = "black")),
shape = guide_legend(order = 2, title = "Treatment") ) +
labs(x = "Ecosystem health index") +
theme_light(base_size = 14)+
theme(axis.text = element_text(size = 13, colour = "black"))
Figure_5_substrate_plot2
ggsave(paste0("Figure_5_",filename_prefix, "_plot2.png"), dpi=1000, width = 6, height = 4, units = "in")
# Correlation with ecosystem health index
p2 <- ggplot(merged_pathway, aes(x = sum, y = Growth)) +
geom_point(color = "black", size = 4, alpha = 0.7, aes(shape = as.character(Treatment.x), fill = as.character(Site.x))) +
geom_smooth(method = "lm", color = "black", se = TRUE, linetype = "dashed", size = 0.7, alpha = 0.3) +
scale_y_continuous(trans='log2',labels = label_number(accuracy = 0.1), limits = c(0.05, 11))+
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33", "#a65628")) +
scale_shape_manual(name = "Treatment", values = c(21, 22, 23), limits = c("NAT", "REST", "DAM"),
labels = c("DAM" = "Degraded", "NAT" = "Natural", "REST" = "Restored")) +
xlab(paste0(x_axis_label)) +
scale_x_continuous(labels = function(x) x / 1000) +
ylab(expression("Microbial growth rate (ng g"^{-1}*"h"^{-1}*")"))+
guides(fill = "none", shape = "none", color = "none") +
theme_light(base_size = 14) +
theme(
axis.text = element_text(size = 13, colour = "black"),
legend.position = "none"
) +
facet_grid(. ~ Site.x, scales = "free")
ggsave(paste0(filename_prefix, "_growth_facet_site.png"), plot = p2, dpi = 1000, width = 8, height = 3, units = "in")
}
##
## Pathway: Degradation of Aromatic Compounds
##
## Call:
## lm(formula = log(Growth) ~ sum, data = merged_pathway)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4780 -0.5584 -0.1146 0.6868 2.4431
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.581e+00 7.814e-01 -2.023 0.0482 *
## sum 4.445e-05 2.171e-05 2.048 0.0457 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.191 on 52 degrees of freedom
## Multiple R-squared: 0.07462, Adjusted R-squared: 0.05682
## F-statistic: 4.193 on 1 and 52 DF, p-value: 0.04566
## sum moisture ph sphag Dim.1 oxic CN eco_index
## sum 1.00000000 -0.39214403 -0.22426292 -0.4614426 0.01311506 0.21019512 -0.05700873 -0.3454815
## moisture -0.39214403 1.00000000 0.43568799 0.4366189 -0.35949111 -0.25681759 0.06967877 0.6790104
## ph -0.22426292 0.43568799 1.00000000 0.2438908 -0.33906204 -0.30907168 -0.04545533 0.6320731
## sphag -0.46144256 0.43661892 0.24389085 1.0000000 -0.13416082 -0.49279052 -0.20426364 0.6980154
## Dim.1 0.01311506 -0.35949111 -0.33906204 -0.1341608 1.00000000 -0.01288157 -0.21379767 -0.4133396
## oxic 0.21019512 -0.25681759 -0.30907168 -0.4927905 -0.01288157 1.00000000 0.39330447 -0.7262251
## CN -0.05700873 0.06967877 -0.04545533 -0.2042636 -0.21379767 0.39330447 1.00000000 -0.4059578
## eco_index -0.34548155 0.67901038 0.63207308 0.6980154 -0.41333962 -0.72622506 -0.40595784 1.0000000
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing missing values or values outside the scale range (`geom_smooth()`).
##
## Pathway: Methane metabolism
##
## Call:
## lm(formula = log(Growth) ~ sum, data = merged_pathway)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.58408 -0.67853 -0.05391 0.83400 1.92436
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.888e+00 7.703e-01 3.749 0.000447 ***
## sum -3.930e-05 1.023e-05 -3.842 0.000334 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.092 on 52 degrees of freedom
## Multiple R-squared: 0.2211, Adjusted R-squared: 0.2061
## F-statistic: 14.76 on 1 and 52 DF, p-value: 0.0003335
## sum moisture ph sphag Dim.1 oxic CN eco_index
## sum 1.00000000 0.42943009 0.38127331 0.4686206 -0.09084801 -0.43145611 -0.17369506 0.5576106
## moisture 0.42943009 1.00000000 0.43568799 0.4366189 -0.35949111 -0.25681759 0.06967877 0.6790104
## ph 0.38127331 0.43568799 1.00000000 0.2438908 -0.33906204 -0.30907168 -0.04545533 0.6320731
## sphag 0.46862064 0.43661892 0.24389085 1.0000000 -0.13416082 -0.49279052 -0.20426364 0.6980154
## Dim.1 -0.09084801 -0.35949111 -0.33906204 -0.1341608 1.00000000 -0.01288157 -0.21379767 -0.4133396
## oxic -0.43145611 -0.25681759 -0.30907168 -0.4927905 -0.01288157 1.00000000 0.39330447 -0.7262251
## CN -0.17369506 0.06967877 -0.04545533 -0.2042636 -0.21379767 0.39330447 1.00000000 -0.4059578
## eco_index 0.55761061 0.67901038 0.63207308 0.6980154 -0.41333962 -0.72622506 -0.40595784 1.0000000
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
##
## Pathway: Carbon fixation pathways in prokaryotes
##
## Call:
## lm(formula = log(Growth) ~ sum, data = merged_pathway)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.57388 -0.68560 -0.00104 0.86503 1.74947
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.746e+00 1.120e+00 4.239 9.21e-05 ***
## sum -4.833e-05 1.127e-05 -4.289 7.80e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.064 on 52 degrees of freedom
## Multiple R-squared: 0.2614, Adjusted R-squared: 0.2471
## F-statistic: 18.4 on 1 and 52 DF, p-value: 7.802e-05
## sum moisture ph sphag Dim.1 oxic CN eco_index
## sum 1.00000000 0.50157384 0.38361361 0.3762242 -0.03557673 -0.45874643 -0.23539239 0.5712193
## moisture 0.50157384 1.00000000 0.43568799 0.4366189 -0.35949111 -0.25681759 0.06967877 0.6790104
## ph 0.38361361 0.43568799 1.00000000 0.2438908 -0.33906204 -0.30907168 -0.04545533 0.6320731
## sphag 0.37622415 0.43661892 0.24389085 1.0000000 -0.13416082 -0.49279052 -0.20426364 0.6980154
## Dim.1 -0.03557673 -0.35949111 -0.33906204 -0.1341608 1.00000000 -0.01288157 -0.21379767 -0.4133396
## oxic -0.45874643 -0.25681759 -0.30907168 -0.4927905 -0.01288157 1.00000000 0.39330447 -0.7262251
## CN -0.23539239 0.06967877 -0.04545533 -0.2042636 -0.21379767 0.39330447 1.00000000 -0.4059578
## eco_index 0.57121929 0.67901038 0.63207308 0.6980154 -0.41333962 -0.72622506 -0.40595784 1.0000000
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 3 rows containing missing values or values outside the scale range (`geom_smooth()`).
##
## Pathway: Nitrogen metabolism
##
## Call:
## lm(formula = log(Growth) ~ sum, data = merged_pathway)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.68350 -0.74435 -0.06852 0.66812 2.51515
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.872e+00 1.326e+00 2.919 0.00517 **
## sum -1.073e-04 3.636e-05 -2.952 0.00473 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.146 on 52 degrees of freedom
## Multiple R-squared: 0.1435, Adjusted R-squared: 0.127
## F-statistic: 8.712 on 1 and 52 DF, p-value: 0.004733
## sum moisture ph sphag Dim.1 oxic CN eco_index
## sum 1.000000000 0.45336439 0.33059945 0.1167798 0.04067987 -0.24598560 -0.002411176 0.3211999
## moisture 0.453364386 1.00000000 0.43568799 0.4366189 -0.35949111 -0.25681759 0.069678771 0.6790104
## ph 0.330599452 0.43568799 1.00000000 0.2438908 -0.33906204 -0.30907168 -0.045455326 0.6320731
## sphag 0.116779820 0.43661892 0.24389085 1.0000000 -0.13416082 -0.49279052 -0.204263645 0.6980154
## Dim.1 0.040679870 -0.35949111 -0.33906204 -0.1341608 1.00000000 -0.01288157 -0.213797667 -0.4133396
## oxic -0.245985602 -0.25681759 -0.30907168 -0.4927905 -0.01288157 1.00000000 0.393304474 -0.7262251
## CN -0.002411176 0.06967877 -0.04545533 -0.2042636 -0.21379767 0.39330447 1.000000000 -0.4059578
## eco_index 0.321199895 0.67901038 0.63207308 0.6980154 -0.41333962 -0.72622506 -0.405957837 1.0000000
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
##
## Pathway: Sulfur metabolism
##
## Call:
## lm(formula = log(Growth) ~ sum, data = merged_pathway)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.60205 -0.75515 0.00845 0.82268 2.24989
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.882e+00 1.405e+00 3.475 0.001039 **
## sum -1.754e-04 5.001e-05 -3.507 0.000944 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.113 on 52 degrees of freedom
## Multiple R-squared: 0.1913, Adjusted R-squared: 0.1757
## F-statistic: 12.3 on 1 and 52 DF, p-value: 0.0009438
## sum moisture ph sphag Dim.1 oxic CN eco_index
## sum 1.00000000 0.43161595 0.42147045 0.2010074 0.00785757 -0.40426302 -0.09335756 0.4450066
## moisture 0.43161595 1.00000000 0.43568799 0.4366189 -0.35949111 -0.25681759 0.06967877 0.6790104
## ph 0.42147045 0.43568799 1.00000000 0.2438908 -0.33906204 -0.30907168 -0.04545533 0.6320731
## sphag 0.20100738 0.43661892 0.24389085 1.0000000 -0.13416082 -0.49279052 -0.20426364 0.6980154
## Dim.1 0.00785757 -0.35949111 -0.33906204 -0.1341608 1.00000000 -0.01288157 -0.21379767 -0.4133396
## oxic -0.40426302 -0.25681759 -0.30907168 -0.4927905 -0.01288157 1.00000000 0.39330447 -0.7262251
## CN -0.09335756 0.06967877 -0.04545533 -0.2042636 -0.21379767 0.39330447 1.00000000 -0.4059578
## eco_index 0.44500664 0.67901038 0.63207308 0.6980154 -0.41333962 -0.72622506 -0.40595784 1.0000000
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
#LOOP for CAZy
library(dplyr)
library(ggplot2)
library(emmeans)
library(ggcorrplot)
# Define CAZy substrates: name used in data, y-axis label, and filename prefix
substrates <- list(
list("Cell Wall", "Microbial cell wall", "CellWall"),
list("Lignin", "Lignin", "Lignin"),
list("Oligosaccharides", "Oligosaccharides", "Oligosaccharides"),
list("Cellulose", "Cellulose", "Cellulose"),
list("Hemicellulose", "Hemicellulose", "Hemicellulose")
)
# Loop through each CAZy substrate
for (entry in substrates) {
substrate_name <- entry[[1]]
y_label <- entry[[2]]
filename_prefix <- entry[[3]]
# Filter data
df_sub <- cazy_substrate_sum %>%
group_by(Sample, Site, Treatment) %>%
filter(Substrate == substrate_name) %>%
filter(Sample %in% c("MGr3I", "MGr3H", "MGr3G", "MGr2F", "MGr2E", "MGr2D", "MGr1C", "MGr1B",
"MGr1A", "CRr3I", "CRr3H", "CRr3G", "CRr2F", "CRr2E", "CRr2D", "CRr1C",
"CRr1B", "CRr1A", "MHr3I", "MHr3H", "MHr3G", "MHr2F", "MHr2E", "MHr2D",
"MHr1C", "MHr1B", "MHr1A", "BOr3I", "BOr3H", "BOr3G", "BOr2F", "BOr2E",
"BOr2D", "BOr1C", "BOr1B", "BOr1A", "BAr3I", "BAr3H", "BAr3G", "BAr2F",
"BAr2E", "BAr2D", "BAr1C", "BAr1B", "BAr1A",
"LASCr2F", "LASCr2E", "LASCr2D",
"LABRr1C", "LABRr1B", "LABRr1A", "LASAr3I", "LASAr3G", "LASAr3H"))
# Merge with environmental data
merged_df <- merge(df_sub, env_data, by = "Sample")
# Plot
p <- ggplot(df_sub, aes(x = Treatment, y = reads_sum)) +
geom_boxplot(lwd = 1, outlier.shape = NA) +
geom_jitter(aes(color = Site), alpha = 0.7, width = 0.3, size = 3) +
ylab(paste0(y_label)) +
xlab("") +
theme_light(base_size = 14) +
theme(legend.position = "none", axis.text = element_text(size = 13, colour = "black")) +
scale_colour_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33")) +
scale_x_discrete(
limits = c("DAM", "REST", "NAT"),
labels = c("DAM" = "Degraded", "NAT" = "Natural", "REST" = "Restored")
) +
scale_y_continuous(labels = \(x) x / 1000)
ggsave(paste0("Figure_", filename_prefix, ".png"), plot = p, dpi = 1000, width = 4, height = 4, units = "in")
# Correlation with ecosystem health index
p2 <- ggplot(merged_df, aes(x = eco_index, y = reads_sum)) +
geom_point(color = "black", size = 4, alpha = 0.7, aes(shape = as.character(Treatment.x), fill = as.character(Site.x))) +
geom_smooth(method = "lm", color = "black", se = TRUE, linetype = "dashed", size = 0.7, alpha = 0.3) +
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33")) +
scale_shape_manual(name = "Treatment", values = c(21, 22, 23), limits = c("NAT", "REST", "DAM"),
labels = c("DAM" = "Degraded", "NAT" = "Natural", "REST" = "Restored")) +
ylab(paste0(y_label)) +
scale_y_continuous(labels = function(x) x / 1000) +
labs(x = "Ecosystem health index") +
guides(fill = "none", shape = "none", color = "none") +
theme(legend.position = "none") +
theme_light(base_size = 14) +
theme(axis.text = element_text(size = 13, colour = "black"))
ggsave(paste0(filename_prefix, "_eco.png"), plot = p2, dpi = 1000, width = 4, height = 4, units = "in")
# Models and results
# Define output file name
output_file <- paste0("results_", filename_prefix, ".csv")
# Helper function to write each section
write_section <- function(header, df, file, append = TRUE) {
write(paste0("### ", header, " ###"), file = file, append = append)
write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, col.names = TRUE)
write("\n\n", file = file, append = TRUE)
}
# 1. Pairwise comparisons
# Pairwise contrasts using emmeans
library(emmeans)
emm <- emmeans(lm(reads_sum ~ Treatment.x * Site.x, data = merged_df), ~ Treatment.x)
pairwise_results <- summary(pairs(emm))
pairwise_df <- as.data.frame(pairwise_results)
write_section("Pairwise comparisons (Tukey-adjusted)", pairwise_df, output_file, append = FALSE)
# 2. ANOVA: Treatment × Site
# ANOVA interaction analysis
anova_model <- lm(reads_sum ~ Treatment.x * Site.x, data = merged_df)
anova_df <- as.data.frame(anova(anova_model))
anova_df$Effect <- rownames(anova_df)
write_section("ANOVA: Treatment × Site interaction", anova_df, output_file)
# 3. Environmental model: sum ~ moisture + ph + sphag + Dim.1 + oxic + CN
lm_env <- lm(reads_sum ~ moisture + ph + sphag + Dim.1 + oxic + CN, data = merged_df)
lm_env_df <- as.data.frame(summary(lm_env)$coefficients)
lm_env_df$Term <- rownames(lm_env_df)
write_section("Linear model: Environmental variables", lm_env_df, output_file)
# 4. Ecological index model: sum ~ eco_index
lm_eco <- lm(reads_sum ~ eco_index, data = merged_df)
eco_summary <- summary(lm_eco)
# Extract model fit stats
r_squared <- round(eco_summary$r.squared, 3)
adj_r_squared <- round(eco_summary$adj.r.squared, 3)
f_stat <- eco_summary$fstatistic
model_p <- round(pf(f_stat[1], f_stat[2], f_stat[3], lower.tail = FALSE), 4)
# Coefficients table
eco_df <- as.data.frame(eco_summary$coefficients)
eco_df$Term <- rownames(eco_df)
# Add model stats as additional rows
stats_df <- data.frame(
Estimate = NA,
`Std. Error` = NA,
`t value` = NA,
`Pr(>|t|)` = NA,
Term = c(
paste0("R² = ", r_squared),
paste0("Adjusted R² = ", adj_r_squared),
paste0("Model p-value = ", model_p)
)
)
# Combine and write
eco_df <- bind_rows(eco_df, stats_df)
write_section("Linear model: Ecosystem health index", eco_df, output_file)
}
## `geom_smooth()` using formula = 'y ~ x'
## NOTE: Results may be misleading due to involvement in interactions
## Warning in write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, : appending column
## names to file
## Warning in write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, : appending column
## names to file
## Warning in write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, : appending column
## names to file
## Warning in write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, : appending column
## names to file
## `geom_smooth()` using formula = 'y ~ x'
## NOTE: Results may be misleading due to involvement in interactions
## Warning in write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, : appending column
## names to file
## Warning in write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, : appending column
## names to file
## Warning in write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, : appending column
## names to file
## Warning in write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, : appending column
## names to file
## `geom_smooth()` using formula = 'y ~ x'
## NOTE: Results may be misleading due to involvement in interactions
## Warning in write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, : appending column
## names to file
## Warning in write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, : appending column
## names to file
## Warning in write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, : appending column
## names to file
## Warning in write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, : appending column
## names to file
## `geom_smooth()` using formula = 'y ~ x'
## NOTE: Results may be misleading due to involvement in interactions
## Warning in write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, : appending column
## names to file
## Warning in write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, : appending column
## names to file
## Warning in write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, : appending column
## names to file
## Warning in write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, : appending column
## names to file
## `geom_smooth()` using formula = 'y ~ x'
## NOTE: Results may be misleading due to involvement in interactions
## Warning in write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, : appending column
## names to file
## Warning in write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, : appending column
## names to file
## Warning in write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, : appending column
## names to file
## Warning in write.table(df, file = file, sep = ",", row.names = FALSE, append = TRUE, : appending column
## names to file
# Define CAZy substrates: name used in data, y-axis label, and filename prefix
substrates <- list(
list("Cell Wall", "Microbial cell wall", "CellWall"),
list("Lignin", "Lignin", "Lignin"),
list("Oligosaccharides", "Oligosaccharides", "Oligosaccharides"),
list("Cellulose", "Cellulose", "Cellulose"),
list("Hemicellulose", "Hemicellulose", "Hemicellulose")
)
# Loop through each CAZy substrate
for (entry in substrates) {
substrate_name <- entry[[1]]
y_label <- entry[[2]]
filename_prefix <- entry[[3]]
# Filter data
df_sub <- cazy_substrate_sum %>%
group_by(Sample, Site, Treatment) %>%
filter(Substrate == substrate_name) %>%
filter(Sample %in% c("MGr3I", "MGr3H", "MGr3G", "MGr2F", "MGr2E", "MGr2D", "MGr1C", "MGr1B",
"MGr1A", "CRr3I", "CRr3H", "CRr3G", "CRr2F", "CRr2E", "CRr2D", "CRr1C",
"CRr1B", "CRr1A", "MHr3I", "MHr3H", "MHr3G", "MHr2F", "MHr2E", "MHr2D",
"MHr1C", "MHr1B", "MHr1A", "BOr3I", "BOr3H", "BOr3G", "BOr2F", "BOr2E",
"BOr2D", "BOr1C", "BOr1B", "BOr1A", "BAr3I", "BAr3H", "BAr3G", "BAr2F",
"BAr2E", "BAr2D", "BAr1C", "BAr1B", "BAr1A",
"LASCr2F", "LASCr2E", "LASCr2D",
"LABRr1C", "LABRr1B", "LABRr1A", "LASAr3I", "LASAr3G", "LASAr3H"))
# Merge with environmental data
merged_df <- merge(df_sub, env_data, by = "Sample")
# Reorder Site.x
site_order <- c("Balmoral", "Bowness", "Moors_House", "Langwell", "Crocach", "Migneint")
merged_df$Site.x <- factor(merged_df$Site.x, levels = (site_order))
# Correlation with ecosystem health index
p2 <- ggplot(merged_df, aes(x = eco_index, y = reads_sum)) +
geom_point(color = "black", size = 4, alpha = 0.7, aes(shape = as.character(Treatment.x), fill = as.character(Site.x))) +
geom_smooth(method = "lm", color = "black", se = TRUE, linetype = "dashed", size = 0.7, alpha = 0.3) +
scale_fill_manual(values = c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33")) +
scale_shape_manual(name = "Treatment", values = c(21, 22, 23), limits = c("NAT", "REST", "DAM"),
labels = c("DAM" = "Degraded", "NAT" = "Natural", "REST" = "Restored")) +
ylab(paste0(y_label)) +
scale_y_continuous(labels = function(x) x / 1000) +
labs(x = "Ecosystem health index") +
guides(fill = "none", shape = "none", color = "none") +
theme_light(base_size = 14) +
theme(
axis.text = element_text(size = 13, colour = "black"),
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none"
) +
facet_grid(. ~ Site.x, scales = "free")
ggsave(paste0(filename_prefix, "_eco.png"), plot = p2, dpi = 1000, width = 8, height = 3, units = "in")
}
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'